# -*- coding: utf-8 -*-

# Define here the models for your spider middleware
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/spider-middleware.html

import random
import logging
from scinet.user_agents import user_agent_list

class RandomUserAgentMiddleware(object):
    def process_request(self, request, spider):
        request.headers["User-Agent"] = random.choice(user_agent_list)
        if not request.headers.get("Host"):
            request.headers["Host"] = "blog.sciencenet.cn"

class HandleDataLossMiddleware(object):
    def __init__(self):
        self.logger = logging.getLogger(__name__)

    def process_response(self, request, response, spider):
        if "dataloss" in response.flags:
            self.logger.debug("<%s>出现DataLoss！" % str(response.url))
            self.logger.debug('返回状态码:' + str(response.status))
            request.dont_filter = True
            return request
        return response